home *** CD-ROM | disk | FTP | other *** search
/ AmigActive 2 / AACD 2.iso / AACD / Magazine / GraphicsCards / StormMesa / src / mmath_asmppc.p < prev    next >
Text File  |  1999-02-04  |  6KB  |  204 lines

  1.  
  2.                 XDEF    _gl_sqrt
  3.                 XDEF    _gl_sqrt__r
  4.  
  5. _gl_sqrt__r
  6. _gl_sqrt
  7.                 ls      f0,_f_0
  8.                 ls      f5,_f_0_5
  9.                 fcmpu   f0,f1
  10.                 beq     .store
  11.                 frsqrte f7,f1                   ;calculate SQRT
  12.                 fres    f0,f7
  13.                 fmadds  f0,f1,f7,f0
  14.                 fmuls   f0,f0,f5
  15.                 fdivs   f7,f1,f0
  16.                 fadds   f0,f7,f0
  17.                 fmuls   f0,f0,f5
  18.                 fdivs   f7,f1,f0
  19.                 fadds   f0,f7,f0
  20.                 fmuls   f0,f0,f5
  21. .store
  22.                 fmr     f1,f0
  23.                 blr
  24.  
  25. /* this function here is inaccurate, but extremly fast
  26.    very useful for vector normalization, if low accuracy
  27.    is acceptable */
  28.  
  29.                 XDEF    _gl_invsqrt
  30.                 XDEF    _gl_invsqrt__r
  31. _gl_invsqrt__r
  32.                 frsqrte f1,f1
  33.                 blr
  34.  
  35.  
  36.  
  37.                 XDEF    _fast_exp
  38.                 XDEF    _fast_exp__r
  39.  
  40. _fast_exp
  41. _fast_exp__r
  42.                 ls      f2,invln2
  43.                 ls      f3,ln2
  44.                 ls      f4,p1
  45.                 ls      f5,p2
  46.                 lf      f6,_CITF_0
  47.                 fmuls   f7,f2,f1                ;f7 = x / ln2
  48.                 fctiwz  f0,f7
  49.                 stfd    f0,-8(r1)
  50.                 lwz     r3,-4(r1)
  51.                 xoris   r4,r3,$8000
  52.                 sw      r4,_CITF_TEMP+4
  53.                 lf      f7,_CITF_TEMP
  54.                 fsub    f7,f7,f6                ;f7 = k = (int)x / ln2
  55.                 fnmsubs f8,f7,f3,f1             ;f8 = r
  56.                 fmuls   f9,f8,f8                ;f9 = r^2
  57.                 fmadds  f10,f5,f9,f4
  58.                 fnmsubs f10,f10,f9,f8           ;f10 = R1
  59.                 ls      f11,_f_2                ;f11 = 2.0
  60.                 fsubs   f12,f11,f10             ;f12 = 2-R1
  61.                 fmuls   f10,f8,f10              ;f10 = r*R1
  62.                 fdivs   f10,f10,f12             ;f10 = r*R1/(2-R1)
  63.                 fadds   f10,f10,f8              ;f10 = r+r*R1/(2-R1)
  64.                 ls      f11,_f_1                ;f11 = 1.0
  65.                 fadds   f10,f10,f11             ;f10 = exp(r)
  66.                 IFNE    1
  67.                 slwi    r4,r3,20
  68.                 addis   r4,r4,$3ff0
  69.                 stw     r4,-8(r1)
  70.                 li      r0,0
  71.                 stw     r0,-4(r1)
  72.                 lfd     f7,-8(r1)
  73.                 ELSE
  74.                 slwi    r4,r3,23
  75.                 addis   r4,r4,$3f80
  76.                 stw     r4,-4(r1)
  77.                 lfs     f7,-4(r1)
  78.                 ENDC
  79.                 fmul    f1,f7,f10               ;f1 = exp(x)
  80.                 blr
  81.  
  82.  
  83.  
  84.                 XDEF    _fast_log
  85.                 XDEF    _fast_log__r
  86.  
  87. _fast_log
  88. _fast_log__r
  89.                 ls      f8,ln2
  90.                 ls      f9,l1
  91.                 ls      f10,l2
  92.                 stfs    f1,-4(r1)
  93.                 lwz     r3,-4(r1)
  94.                 srwi    r4,r3,23
  95.                 andi.   r4,r4,$ff
  96.                 subi    r4,r4,$7f               ;r4 = k
  97.                 oris    r3,r3,$ff80
  98.                 xoris   r3,r3,$ff80
  99.                 oris    r3,r3,$3f80
  100.                 stw     r3,-4(r1)
  101.                 lfs     f2,-4(r1)               ;f2 = 1+f
  102.                 ls      f3,_f_1                 ;f3 = 1.0
  103.                 fadds   f4,f2,f3                ;f4 = 2+f
  104.                 fsubs   f5,f2,f3                ;f5 = f
  105.                 fdivs   f6,f5,f4                ;f6 = s = f/(2+f)
  106.                 fmuls   f7,f6,f6                ;f7 = s*s
  107.                 fmadds  f11,f10,f7,f9
  108.                 fmuls   f12,f11,f7
  109.                 fmuls   f11,f12,f6
  110.                 fadds   f12,f11,f6
  111.                 fadds   f11,f12,f6              ;f11 = log(1+f)
  112.                 lf      f2,_CITF_0
  113.                 xoris   r5,r4,$8000
  114.                 sw      r5,_CITF_TEMP+4
  115.                 lf      f3,_CITF_TEMP
  116.                 fsub    f3,f3,f2                ;f3 = (float)k
  117.                 fmuls   f3,f3,f8                ;f3 = k*ln2
  118.                 fadds   f1,f11,f3               ;f1 = log(x)
  119.                 blr
  120.  
  121.                 XDEF    _fast_pow
  122.                 XDEF    _fast_pow__r
  123.  
  124. _fast_pow
  125. _fast_pow__r
  126.                 mflr    r0
  127.                 stw     r0,8(r1)
  128.                 stwu    r1,-64(r1)
  129.                 stfs    f2,60(r1)
  130.                 bl      _fast_log
  131.                 lfs     f2,60(r1)
  132.                 fmuls   f1,f1,f2
  133.                 bl      _fast_exp
  134.                 addi    r1,r1,64
  135.                 lwz     r0,8(r1)
  136.                 mtlr    r0
  137.                 blr
  138.  
  139.                 XDEF    _fast_invsqrt
  140.                 XDEF    _fast_invsqrt__r
  141. _fast_invsqrt
  142. _fast_invsqrt__r
  143.                 ls      f5,_f_0_5
  144.                 fsubs   f0,f5,f5
  145.                 fcmpu   f0,f1
  146.                 fadd    f3,f5,f5
  147.                 fadd    f3,f3,f5
  148.                 beq     .end
  149.                 frsqrte f0,f1
  150.                 fmuls   f6,f1,f5
  151.                 fmuls   f4,f0,f0
  152.                 fnmsubs f2,f4,f6,f3
  153.                 fmuls   f0,f2,f0
  154.                 fmuls   f4,f0,f0
  155.                 fnmsubs f2,f4,f6,f3
  156.                 fmuls   f0,f2,f0
  157.                 fmuls   f4,f0,f0
  158.                 fnmsubs f2,f4,f6,f3
  159.                 fmuls   f1,f2,f0
  160. .end
  161.                 blr
  162.  
  163.                 XDEF    _fast_sqrt
  164.                 XDEF    _fast_sqrt__r
  165. _fast_sqrt
  166. _fast_sqrt__r
  167.                 ls      f5,_f_0_5
  168.                 fsubs   f0,f5,f5
  169.                 fcmpu   f0,f1
  170.                 fadd    f3,f5,f5
  171.                 fadd    f3,f3,f5
  172.                 beq     .end
  173.                 frsqrte f0,f1
  174.                 fmuls   f6,f1,f5
  175.                 fmuls   f4,f0,f0
  176.                 fnmsubs f2,f4,f6,f3
  177.                 fmuls   f0,f2,f0
  178.                 fmuls   f4,f0,f0
  179.                 fnmsubs f2,f4,f6,f3
  180.                 fmuls   f0,f2,f0
  181.                 fmuls   f4,f0,f0
  182.                 fnmsubs f2,f4,f6,f3
  183.                 fmuls   f1,f2,f0
  184.                 fres    f1,f1
  185. .end
  186.                 blr
  187.  
  188.                 section data
  189. _f_0            dc.s    0
  190. _f_0_5          dc.s    0.5
  191. _f_10_e_60      dc.l    $4c98e45c,$b7e8c8e4
  192. _f_10_e_m60     dc.l    $33b011c3,$d1cf8112
  193. ln2             dc.s    0.69314718
  194. invln2          dc.s    1.442695042
  195. p1              dc.s    0.1666666667
  196. p2              dc.s    -0.002777777778
  197. l1              dc.s    0.6666666666
  198. l2              dc.s    0.4000000000
  199. _CITF_0         dc.l    $43300000,$80000000
  200. _CITF_TEMP      dc.l    $43300000,0
  201. _f_1            dc.s    1.0
  202. _f_2            dc.s    2.0
  203.  
  204.